
do "E:/ReplicateBuild/02_code/00_environment/00_set_environment.do"

*Table of Contents
local prepareExport = 1
local queryGoogle = 1
local readInTimes = 1

***********************************************
***********************************************
* prepare location datasets
***********************************************
***********************************************

if `prepareExport' == 1 {

***********************************************
* prepare school location dataset
***********************************************

use "$basedata/FOCAL_school_year_data", clear
keep ncerdc_lea ncerdc_schlcode sy elem_school_sample school_latitude school_longitude 
keep if ncerdc_lea==XXX // hiding identity
destring school_latitude, replace
destring school_longitude, replace

drop if missing(school_latitude) | missing(school_longitude)
sort ncerdc_lea ncerdc_schlcode sy
bysort ncerdc_lea ncerdc_schlcode: keep if _n==_N
keep school_latitude school_longitude

duplicates drop

save "$temp/FOCAL_school_locations_for_commute", replace
outsheet using "$temp/FOCAL_school_locations_for_commute.csv", comma replace

***********************************************
* prepare teacher location dataset
***********************************************
use "$basedata/FOCAL_applicant_year_data", clear

keep applicant_id FOCALEmployeeID ncerdc_id zip5 teacher_app_latitude teacher_app_longitude

duplicates drop 

*All other info is implied by applicant_id (so teacher_app_latitude teacher_app_longitude map with zip5)
isid applicant_id

gen num_applicants = 1

collapse (sum) num_applicants, by(zip5 teacher_app_latitude teacher_app_longitude)

drop if zip5==.
isid zip5

save "$temp/FOCAL_applicant_locations_for_commute", replace
outsheet using "$temp/FOCAL_applicant_locations_for_commute.csv", comma replace


***********************************************
* prepare school x teacher location dataset
***********************************************
*School Coordinates
use "$basedata/FOCAL_school_year_data", clear
keep ncerdc_lea ncerdc_schlcode sy elem_school_sample school_latitude school_longitude 
keep if ncerdc_lea==XXX // hiding identity
destring school_latitude, replace
destring school_longitude, replace

drop if missing(school_latitude) | missing(school_longitude)
sort ncerdc_lea ncerdc_schlcode sy
bysort ncerdc_lea ncerdc_schlcode: keep if _n==_N
keep school_latitude school_longitude

duplicates drop

tempfile temp_school_cords
save `temp_school_cords', replace

*Applicant Coordinates
use "$basedata/FOCAL_applicant_year_data", clear

keep applicant_id FOCALEmployeeID ncerdc_id zip5 teacher_app_latitude teacher_app_longitude

duplicates drop 

*All other info is implied by applicant_id (so teacher_app_latitude teacher_app_longitude map with zip5)
isid applicant_id

gen num_applicants = 1

collapse (sum) num_applicants, by(zip5 teacher_app_latitude teacher_app_longitude)

drop if zip5==.
isid zip5

tempfile temp_applicant_cords
save `temp_applicant_cords', replace

cross using `temp_school_cords'

geodist school_latitude school_longitude teacher_app_latitude teacher_app_longitude if school_latitude!=. & school_longitude!=. & teacher_app_latitude!=. & teacher_app_longitude!=., gen(dist)

bys zip5: egen min_dist = min(dist)

*Keep only close enough teachers
keep if min_dist<=40 
drop min_dist

keep zip5 teacher_app_latitude teacher_app_longitude school_latitude school_longitude num_applicants dist

sort zip5 school_latitude school_longitude

save "$temp/FOCAL_applicant_school_locations_for_commute", replace
outsheet using "$temp/FOCAL_applicant_school_locations_for_commute.csv", comma replace

}

***********************************************
***********************************************
* query Google for commute times and read them in
***********************************************
***********************************************

if `queryGoogle' == 1 {
	import delimited using "$temp/FOCAL_applicant_school_locations_for_commute", clear
	cap rename ïzip5 zip5
	saveold  "$temp/commute_distance_matrix_prep", version(12) replace
	

	/*Done in R
	
		library("foreign", lib.loc="/Library/Frameworks/R.framework/Versions/3.5/Resources/library")
	library("googleway", lib.loc="/Library/Frameworks/R.framework/Versions/3.5/Resources/library")
	library("writexl", lib.loc="/Library/Frameworks/R.framework/Versions/3.5/Resources/library")

	api_key = "AIzaSyBwp-fNhwG36vtbrNHsS53PwWwLhbDJc5A"

	commutes <-read.dta("$temp/commute_distance_matrix_prep.dta")
	commutes$commute_time = NaN
	commutes$commute_distance = NaN
	arrive_by = as.POSIXct(1614945600, origin="1970-01-01")

	for (i in 1:nrow(commutes)){
	  g_result = google_distance(origins = list(c(commutes$teacher_app_latitude[i],commutes$teacher_app_longitude[i])), 
								 destinations = list(c(commutes$school_latitude[i],commutes$school_longitude[i])), 
								 mode = "driving",
								 arrival_time = arrive_by,
								 key=api_key)
	  commutes$commute_time[i] = g_result[["rows"]][["elements"]][[1]][["duration"]][["value"]]
	  commutes$commute_distance[i] = g_result[["rows"]][["elements"]][[1]][["distance"]][["value"]]
	}

	write_xlsx(commutes,"$temp/FOCAL_commutes.xlsx")
	
	*/
}

if `readInTimes' == 1 {
	import excel using "$temp/FOCAL_commutes.xlsx", clear firstrow
	
	replace school_latitude = round(school_latitude,.00001)
	replace school_longitude = round(school_longitude,.00001)

	save "$auxdata/FOCAL_commute_times", replace
	
}


